import json
import re

def clean_calculation(text: str) -> str:
    return re.sub(r'<<.*?>>', '', text)

def extract_final_number(text: str) -> str:
    lines = text.strip().split('\n')
    last_line = lines[-1]
    numbers = re.findall(r'\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:\.\d+)?', last_line)
    if numbers:
        return numbers[-1].replace(',', '')
    return ""

def process_answer(answer: str) -> str:
    parts = answer.split('\n#### ')
    if len(parts) != 2:
        return answer
        
    steps = parts[0]
    final_answer = parts[1]
    steps = clean_calculation(steps)
    
    steps += f"\nThe answer is {final_answer}."
    return steps

def convert_to_cot_format(input_file: str, output_file: str, sample_count: int = 300):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    formatted_examples = []
    
    # Only process specified number of samples
    for item in data[:sample_count]:
        question = f"Question: {item['question']}"
        
        answer = process_answer(item['answer'])
        
        example = f"{question}\n{answer}"
        formatted_examples.append(example)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n\n'.join(formatted_examples))

if __name__ == '__main__':
    input_file = "gsm8k.json"
    output_file = "/reasoning_output_gsm8k_correct.txt"
    
    convert_to_cot_format(input_file, output_file)
    print(f"Conversion completed, output file saved to: {output_file}")
